# "Targeting Coethnic Voters, Elites or Both?"
# Descriptive statistics
# Dongil Lee
# 10/4/2024

library(haven)
library(ggplot2)
library(tidyr)
library(dplyr)

##########################################
#           Multiplot function           #
##########################################

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

# Set the working directory
setwd("/Users/dlee/Dropbox/Work/NYU/2016 Summer/MA_thesis/combined_data")

# Read the .dta file
df <- haven::read_dta("data_with_MP.dta")

# Descriptive statistics plot (Foreign aid)
# Figure A1.

# Aggregate by year
aid.by.year <- df %>% select(d_total, pop, year) %>% group_by(year) %>% summarize(across(c(d_total, pop), sum))

# Calculate log(aid pc)
aid.by.year$logaidpc <- log(aid.by.year$d_total/aid.by.year$pop)

# Aid level by year
ggplot(aid.by.year, aes(year, logaidpc)) + 
  geom_point() + geom_line() + ylim(0,8) + theme_bw() +
  xlab("Year") + ylab("Aid pc (log)") + scale_x_continuous(breaks=1999:2010) +
  geom_vline(xintercept = 2003.5, colour="black", linetype = "dashed") +
  geom_vline(xintercept = 2008.5, colour="black", linetype = "dashed") +
  theme(axis.text=element_text(size=20), axis.title=element_text(size=20))

setwd("/Users/dlee/Dropbox/Work/NYU/2016 Summer/MA_thesis/graphs")
ggsave("aid_by_year.jpg", width = 10, height = 8)

# Figure A2.

# Assuming d_total is your vector or column
df$dummy_d_aid <- ifelse(df$d_total > 0, 1, 0)

# Subset (year, log_d_aidpc, dummy_d_aid)
dfg <- df %>% select(year, log_d_aidpc, d_total, dummy_d_aid)

# Regime variable
dfg$regime[dfg$year>=1999 & dfg$year<=2003] <- "Muluzi Regime"
dfg$regime[dfg$year>=2004 & dfg$year<=2008] <- "Mutharika I Regime"
dfg$regime[dfg$year>=2009 & dfg$year<=2010] <- "Mutharika II Regime"

# Box plot
q1 <- ggplot(dfg, aes(regime, log_d_aidpc)) + geom_boxplot(fill = "grey80") + 
  theme_bw() + xlab("Regime") + ylab("Aid pc (log)") + ylim(0,8) +
  theme(axis.text=element_text(size=10), axis.title=element_text(size=10))

setwd("/Users/dlee/Dropbox/Work/NYU/2016 Summer/MA_thesis/graphs")
ggsave("aid_by_regime_boxplot.jpg", width = 12, height = 9)

dfg$count <- 1
dfg2 <- dfg %>% group_by(regime) %>% summarise_each(funs(sum))
dfg2$percent <- dfg2$dummy_d_aid/dfg2$count

# How much constituencies received aid?
q2 <- ggplot(dfg2, aes(regime, percent)) + theme_bw() +
  geom_bar(stat="identity") + xlab("Regime") + ylab("Probability of Receiving Aid") +
  theme(axis.text=element_text(size=10), axis.title=element_text(size=10))

setwd("/Users/dlee/Dropbox/Work/NYU/2016 Summer/MA_thesis/graphs")
ggsave("aid_prob_by_regime.jpg", width = 12, height = 9)

setwd("/Users/dlee/Dropbox/Work/NYU/2016 Summer/MA_thesis/graphs")
png("dist_aid.png", width = 24, height = 9, units = "cm", res=320)
multiplot(q1, q2, cols=2)
dev.off()

# Parallel trends assumption plot
# Figures A9-A11.
setwd("/Users/dlee/Dropbox/Work/NYU/2016 Summer/MA_thesis/graphs2")

gdf1 <- df %>%
  tidyr::drop_na(MPcoethnic1) %>%
  dplyr::group_by(MPcoethnic1, year) %>%
  dplyr::summarize(d_aidpc = (mean(d_aidpc)))

ggplot(gdf1, aes(y=d_aidpc, x=year, color= factor(MPcoethnic1))) +
  geom_line(size=1) +
  ylim(0,900)+
  geom_vline(xintercept=c(1999, 2004), linetype="dashed")+
  xlab("Year") +
  ylab("Aid disbursement pc") + 
  labs(color="Coethnic MP?") +
  theme_bw() +
  annotate("label", x=2001.5, y=700, label="Muluzi I")

ggsave("parallel_trends1.pdf", width = 16, height = 8, units = "cm")

gdf2 <- df %>%
  tidyr::drop_na(MPcoethnic2) %>%
  dplyr::group_by(MPcoethnic2, year) %>%
  dplyr::summarize(d_aidpc = (mean(d_aidpc)))

ggplot(gdf2, aes(y=d_aidpc, x=year, color= factor(MPcoethnic2))) +
  geom_line(size=1) +
  ylim(0,800)+
  geom_vline(xintercept=c(2004, 2009), linetype="dashed")+
  xlab("Year") +
  ylab("Aid disbursement pc") + 
  labs(color="Coethnic MP?") +
  theme_bw() +
  annotate("label", x=2007, y=600, label="Mutharika I")

ggsave("parallel_trends2.pdf", width = 16, height = 8, units = "cm")

gdf3 <- df %>%
  tidyr::drop_na(MPcoethnic3) %>%
  dplyr::group_by(MPcoethnic3, year) %>%
  dplyr::summarize(d_aidpc = (mean(d_aidpc)))

ggplot(gdf3, aes(y=d_aidpc, x=year, color= factor(MPcoethnic3))) +
  geom_line(size=1) +
  ylim(0,800)+
  geom_vline(xintercept=c(2009, 2010), linetype="dashed")+
  xlab("Year") +
  ylab("Aid disbursement pc") + 
  labs(color="Coethnic MP?") +
  theme_bw() +
  annotate("label", x=2009, y=600, label="Mutharika II")

ggsave("parallel_trends3.pdf", width = 16, height = 8, units = "cm")